
glo cnd_data "/Users/magdalena/Dropbox (Harvard University)/Coursework/Harvard/2022 Fall/Gov 2001/Climate conflict/data"

*GDIS: Natural Disasters
import delimited "$cnd_data/raw/naturaldisasters/pend-gdis-1960-2018-disasterlocations.csv", delimiter("", collapse) bindquote(strict) encoding(ISO-8859-2) clear 
sort country year
encode disastertype, gen(disastertype_id)
collapse (first) latitude longitude (count) n_natdis=geo_id , by (adm1 country year disastertype_id)
bys adm1 country: egen lat=mean(latitude)
bys adm1 country: egen lon=mean(longitude)
drop latitude longitude 
egen loc_year=group(adm1 country year)
reshape wide n_natdis, i(loc_year) j(disastertype_id) 
egen adm1_id=group(adm1 country)
egen total_natdis=rowtotal(n_natdis?)
label var n_natdis1 "Number of droughts"
label var n_natdis2 "Number of earthquakes"
label var n_natdis3 "Number of extreme temperature events"
label var n_natdis4 "Number of floods"
label var n_natdis5 "Number of landslides"
label var n_natdis6 "Number of mass movements (dry)"
label var n_natdis7 "Number of storms"
label var n_natdis8 "Number of volcanic activity"

save "$cnd_data/raw/naturaldisasters/GDIS_adm1_year.dta" , replace
preserve
collapse (first) country adm1 lat lon , by(adm1_id)
sort country adm1
export delimited using "$cnd_data/raw/naturaldisasters/gdis_adm1.csv", replace
restore

*200 countries
*2931 adm1 regions
*59 years

*UPPSALA: Violent Conflict
import delimited "$cnd_data/raw/conflict/GEDEvent_v22_1.csv", bindquote(strict) encoding(UTF-8) maxquotedrows(100) clear 
split date_start
drop date_start2 date_start
gen date_start = date(date_start1, "20YMD")
split date_end
drop date_end2 date_end
gen date_end = date(date_end1, "20YMD")
gen conflictdays=date_end-date_start+1
gen flag_missadm1= adm_1=="" 
replace adm_1=country if adm_1==""
// if admin_1 is a country we should assign those events/deaths/days to all provinces in that country? including both those that are missing here and those that are in this dataset.
// these are 915 out of 13,362 observations -- but is this bc of measurement error or bc they affected the whole country? investigate
// i'm dropping these events for now

collapse (sum) total_deaths=best conflictdays (count) n_events=id (first) latitude longitude country_id region flag_missadm1 , by(adm_1 country year type_of_violence)
bys adm_1 country: egen lat=mean(latitude)
bys adm_1 country: egen lon=mean(longitude)
drop latitude longitude
egen loc_year_id=group(adm_1 country year)
reshape wide total_deaths conflictdays n_events, i(loc_year_id) j(type_of_violence) 
sort country adm_1 year
egen adm1_id=group(adm_1 country)
egen total_events=rowtotal(n_events1 n_events2 n_events3)
egen total_deaths=rowtotal(total_deaths1 total_deaths2 total_deaths3)
egen total_days=rowtotal(conflictdays1 conflictdays2 conflictdays3)

loc name_1 "state-based"
loc name_2 "non-state"
loc name_3 "one-sided"
forval i=1/3 {
label var n_events`i' "Number of `name_`i'' conflict events"
label var total_deaths`i' "Number of `name_`i'' conflict deaths"
label var conflictdays`i' "Number of `name_`i'' conflict days"
}
preserve
keep if flag_missadm1
save "$cnd_data/raw/conflict/GED_country-only_year.dta" , replace
restore


save "$cnd_data/raw/conflict/GED_adm1_year.dta" , replace
*123 countries
*1687 regions
*33 years

preserve
drop if flag_missadm1
collapse (first) country adm_1 lat lon , by(adm1_id)
sort country adm_1
export delimited using "$cnd_data/raw/conflict/ged_adm1.csv", replace //used this as input in qgis and used the function "add polygon attributes to points" with a shapefile of world adm1 to assign codes
restore


*Create one dataset

preserve
import delimited "$cnd_data/raw/conflict/ged_codes.csv", clear 
tempfile ged_codes
save `ged_codes'
restore

use "$cnd_data/raw/conflict/GED_adm1_year.dta" , clear
drop if flag_missadm1
merge m:1 adm1_id using `ged_codes' , nogen
sort adm1_code country year
order adm1_code year adm_1 country region lat lon total_deaths total_days total_events total_deaths? conflictdays? n_events?
drop if adm1_code==""
collapse (first) adm_1 country region lat lon (sum) total_deaths total_days total_events total_deaths? conflictdays? n_events? , by(adm1_code year)
save "$cnd_data/clean/GED_adm1_year.dta" , replace


// preserve //there are 39 adm1_code missing because they fall in the sea instead of the polygon. need to bring these back -- dropping them for now
// keep if adm1_code==""
// collapse (first) lat lon country , by(adm_1)
// restore

preserve
import delimited "$cnd_data/raw/naturaldisasters/gdis_code.csv", clear 
tempfile gdis_codes
save `gdis_codes'
restore

use "$cnd_data/raw/naturaldisasters/GDIS_adm1_year.dta" , clear
merge m:1 adm1_id using `gdis_codes' , nogen //also 107 adm1_code missing (lots of islands)
sort adm1_code country year
order adm1_code year adm1 country lat lon total_natdis n_natdis?
drop if adm1_code==""
collapse (first) adm1 country lat lon (sum) total_natdis n_natdis? , by(adm1_code year)
save "$cnd_data/clean/GDIS_adm1_year.dta" , replace

keep if year >= 1989

merge 1:1 adm1_code year using "$cnd_data/clean/GED_adm1_year.dta"
rename _merge _mergeoutcomes
encode adm1_code, gen(adm1_vals)
xtset adm1_vals year
tsfill , full
drop adm1_code
decode adm1_vals , gen(adm1_code)
order adm1_vals adm1_code
tempfile outcome_panel
save `outcome_panel'


import delimited "$cnd_data/raw/all_adm1.csv", clear 
keep adm1_code type_en latitude longitude adm0_a3 gns_name
merge 1:m adm1_code using `outcome_panel'
replace year=1989 if year==.
drop adm1_vals
encode adm1_code, gen(adm1_vals)
xtset adm1_vals year
tsfill , full
drop adm1_code
decode adm1_vals , gen(adm1_code)
order adm1_vals adm1_code year
drop adm1 country lat lon adm_1 _merge
replace latitude = L.latitude if latitude >=.
replace longitude = L.longitude if longitude >=.
encode adm0_a3 , gen(country)
encode region , gen(region_id)
bysort adm1_vals (country) : replace country = country[_n-1] if missing(country) 
bysort country (region_id) : replace region_id = region_id[_n-1] if missing(region_id) 
decode country , gen(iso3)
drop adm0_a3 region

foreach var of varlist total_* n_natdis? conflictdays? n_events? {
replace `var'=0 if `var'==.
}

label var n_natdis1 "Number of droughts"
label var n_natdis2 "Number of earthquakes"
label var n_natdis3 "Number of extreme temperature events"
label var n_natdis4 "Number of floods"
label var n_natdis5 "Number of landslides"
label var n_natdis6 "Number of mass movements (dry)"
label var n_natdis7 "Number of storms"
label var n_natdis8 "Number of volcanic activity"

loc name_1 "state-based"
loc name_2 "non-state"
loc name_3 "one-sided"
forval i=1/3 {
label var n_events`i' "Number of `name_`i'' conflict events"
label var total_deaths`i' "Number of `name_`i'' conflict deaths"
label var conflictdays`i' "Number of `name_`i'' conflict days"
}
sort adm1_vals year
gen conflict_t_1 =  F1.total_events
gen conflict_t_2 =  F2.total_events
gen conflict_t_3 =  F3.total_events
gen conflict_t_4 =  F4.total_events
gen conflict_t_5 =  F5.total_events

gen anyconflict_1y=(conflict_t_1 >0) if conflict_t_1!=. 
gen anyconflict_3y=(conflict_t_1 >0)| (conflict_t_2 >0 ) | (conflict_t_3 >0 ) if conflict_t_1!=. & conflict_t_2!=. & conflict_t_3!=. 
gen anyconflict_5y=(conflict_t_1 >0)| (conflict_t_2 >0 ) | (conflict_t_3 >0 )| (conflict_t_4 >0) | (conflict_t_5 >0) if conflict_t_1!=. & conflict_t_2!=. & conflict_t_3!=. & conflict_t_4!=. & conflict_t_5!=.

gen anynatdis=total_natdis>0
g any_drought = n_natdis1>0
g any_earthquakes = n_natdis2>0
g any_exttemp = n_natdis3>0
g any_flood = n_natdis4>0
g any_landslide = n_natdis5>0
g any_massmov = n_natdis6>0
g any_storm = n_natdis7>0
g any_volcact = n_natdis8>0

preserve
import delimited "$cnd_data/raw/all_regions.csv", encoding(UTF-8) clear 
rename alpha3 iso3
tempfile regions
save `regions'
restore
merge m:1 iso3 using `regions' , keep(match master) // 17 countries without region data
save "$cnd_data/clean/climate_conflict_panel.dta" , replace


